/*
 * gmm.c
 * Tommaso Polonelli
 *
 * Copyright (C) 2016 ETH Zurich, University of Bologna
 * Copyright (C) 2018 Tommaso Polonelli
 *
 * This software may be modified and distributed under the terms
 * of the MIT license.  See the LICENSE file for details.
 *
 * Created on: May 25, 2020
 *
 */

#include "gmm.h"
#include "fixed_p.h"
#include <string.h>

/*********** Constants  ****************/

/* debug */
#define PRINT_IMG	(0)
#define PERF		(0)


/* Operation block size (number of pixel per operators)
 * this impact the L1 memory but decrease number of transfers L1 -> L3
 */
#define BLOCK_SIZE Width

#define qW 7
#define qM 0
#define qS 2

#define STOREW(x)	FIXED2INT(((x) << (qW)))
#define STOREM(x)	FIXED2INT(((x) << (qM)))
#define STORES(x)	FIXED2INT(((x) << (qS)))

#define GETW(x)		(INT2FIXED(x) >> (qW))
#define GETM(x)		(INT2FIXED(x) >> (qM))
#define GETS(x)		(INT2FIXED(x) >> (qS))

/*********** Structures *************/
typedef struct{
	unsigned char sigma;
	unsigned char mu;
	unsigned char weight;
}GMM;


/*********** Declarations *************/

/*------ Global variables  -----------*/
#if PRINT_IMG
int mask [324*244];
#endif

static RT_FC_TINY_DATA int Height;
static RT_FC_TINY_DATA int Width;
static RT_FC_TINY_DATA int Size;
static RT_FC_TINY_DATA Fixed alpha;
static RT_FC_TINY_DATA Fixed prune;
static RT_FC_TINY_DATA uint8_t MaxModes;
static RT_L2_DATA uint32_t hyper_buff;
struct RT_FC_TINY_DATA pi_device *ram;

static RT_FC_TINY_DATA Fixed un_men_alpha;

static unsigned char *readwrite;



/* GMM Parameters */
static RT_FC_TINY_DATA Fixed Threshold = INT2FIXED(50);
static RT_FC_TINY_DATA Fixed variance = INT2FIXED(36);
static RT_FC_TINY_DATA Fixed complexity_prior = FLOAT2FIXED(0.05f);
static RT_FC_TINY_DATA Fixed BG_Th = FLOAT2FIXED(0.75f);



/*------ Function prototypes ---------*/


/*********** Functions   ****************/
void InitModel(int hystory, int w, int h, uint8_t Modes, struct pi_device *ram_){

	int i;
	uint32_t hyper_p;
	uint8_t *buff;

	//Init variables
	Width = w;
	Height = h;
	Size = (Height*Width);
	alpha = FIXED_DIV(FIXED_ONE,INT2FIXED(hystory));

	prune = FIXED_MUL(alpha,complexity_prior);
	un_men_alpha = (FIXED_ONE-alpha);

	MaxModes = Modes;
	ram = ram_;

	if(hyper_buff == 0){

		// Init hyper at cold boot
		if (pi_ram_alloc(ram, &hyper_buff, (uint32_t) Size*MaxModes*sizeof(GMM)))
		{
			printf("Ram malloc failed !\n\r");
			pmsis_exit(-4);
		}
		else
		{
			printf("Ram allocated : %lx %ld.\n\r", hyper_buff, (uint32_t) Size*MaxModes*sizeof(GMM));
		}

		/* Init model */
		hyper_p = hyper_buff;
		buff = (uint8_t *) pmsis_l2_malloc((uint32_t) Size);
		if (buff == NULL)
		{
			printf("buff alloc failed !\n\r");
			pmsis_exit(-1);
		}
		memset(buff,0,Size);
		printf("GMM INIT: ");
		for(i = 0; i < (int)(((int)MaxModes)*sizeof(GMM)); i++){
			pi_ram_write(ram, hyper_p, (uint8_t *)buff, (uint32_t) Size);
			hyper_p += Size;
			printf(".");
		}
		printf("\n\r");
		pmsis_l2_malloc_free(buff, (uint32_t) Size);

		printf("GMMOK\n\r");

	}else{

		printf("GMM_READY\n\r");

	}



}

void ClearModel(void){

	pi_fc_l1_free(readwrite, (uint32_t) sizeof(GMM)*MaxModes*BLOCK_SIZE);

}

/* return modes */
static inline int GetGMM (long P, int Block_C, Fixed *FlW , Fixed *FlM, Fixed *FlS){

	GMM *Gauss;
	int i,j=0;
	uint32_t hyper_p = hyper_buff, temp;
	int modes = 0;

	Gauss = (GMM *)readwrite;

	if (Block_C == 0){
		// Start pointer
		hyper_p += P*sizeof(GMM);
		pi_ram_read(ram, hyper_p, (uint8_t *)readwrite, (uint32_t)(sizeof(GMM)*MaxModes*BLOCK_SIZE));
	}

	//conversion from fixed to floating
	for (i = (MaxModes*Block_C); i < ((MaxModes*Block_C) + MaxModes); i++){
		if (Gauss[i].weight > 0)modes++;
		FlW[j] = ((Fixed)GETW(Gauss[i].weight));
		FlM[j] = ((Fixed)GETM(Gauss[i].mu))    ;
		FlS[j] = ((Fixed)GETS(Gauss[i].sigma)) ;
		j++;
	}

	return modes;

}

static inline void StoreGMM (long P, int Block_C, int Nmodes, Fixed *FlW , Fixed *FlM, Fixed *FlS){
	GMM *Gauss;
	int i,j=0;
	uint32_t hyper_p = hyper_buff;

	Gauss = (GMM *)readwrite;

	//conversion from fixed to floating
	for (i = (MaxModes*Block_C); i < ((MaxModes*Block_C) + Nmodes); i++){
		Gauss[i].weight = (unsigned char)STOREW(FlW[j]);
		Gauss[i].mu     = (unsigned char)STOREM(FlM[j]);
		Gauss[i].sigma  = (unsigned char)STORES(FlS[j]);
		j++;
	}

	if (Block_C == (BLOCK_SIZE - 1)){
		hyper_p += P*sizeof(GMM);
		pi_ram_write(ram, hyper_p, (uint8_t *)readwrite, (uint32_t)(sizeof(GMM)*MaxModes*BLOCK_SIZE));
	}

}

static inline void SubtractPixel(long P, uint8_t *pix, int out_p){
	int i, j, Nmodes, BGGaussians, Fit, z, modes, Block_C;
	Fixed sum, k, sumWeights, tempW, tempM, tempS, S, distance, Frame;
	//long posLocal;
	Fixed FlW [MaxModes], FlM [MaxModes], FlS [MaxModes];
	uint8_t thr;

	for(Block_C = 0;Block_C < BLOCK_SIZE;Block_C++){

		/* Init vars */
		Fit = 0;
		sumWeights = 0;
		thr = 1;
		BGGaussians = 0;
		sum = 0;
		Frame = INT2FIXED(pix[Block_C]);

		/* get from L3 - copy if needed */
		modes = GetGMM (P, Block_C, FlW , FlM, FlS);

		//Calculates number of gaussians of the pixel which are part of the BG
		for (z=0; z < modes; z++){
			if (sum < BG_Th){
				sum += FlW[z];
				BGGaussians++;
			}
		}

		//BG subtraction and update
		Nmodes = modes;
		for (i = 0; i < modes; ++i){

			if (!Fit){
				//background distance
				distance = (FlM[i]-Frame);
				distance = FIXED_MUL(distance,distance);
				j = FIXED_MUL(Threshold,FlS[i]);
				if(distance < j){

					//the Gaussian belongs to the BG model
					Fit=1;

					// check if this Gaussian is part of the background model
					if(i < BGGaussians)	thr=0;

					//update distribution
					FlW[i] = FIXED_MUL(un_men_alpha,FlW[i]) + alpha - prune;
					k = FIXED_DIV(FIXED_ONE,FlW[i]);
					FlM[i] = FIXED_MUL(un_men_alpha,FlM[i]) + FIXED_MUL(k,FIXED_MUL(alpha,Frame));
					FlS[i] = FIXED_MUL(un_men_alpha,FlS[i]) + FIXED_MUL(k,FIXED_MUL(alpha,distance));

					sumWeights += FlW[i];

					// Sort weights so they are in descending order
					for (j=i; j > 0; j--){
						if (FlW[j] > FlW[j-1]){
							tempW = FlW[j];
							tempM = FlM[j];
							tempS = FlS[j];
							FlW[j] = FlW[j-1];
							FlM[j] = FlM[j-1];
							FlS[j] = FlS[j-1];
							FlW[j-1] = tempW;
							FlM[j-1] = tempM;
							FlS[j-1] = tempS;
						}
					}
				}else{
					FlW[i] = FIXED_MUL(un_men_alpha,FlW[i]) + alpha - prune;
					if(FlW[i] < prune){
						FlW[i]= 0;
						Nmodes --;
					}
					sumWeights += FlW[i];
				}
			}else{
				FlW[i] = FIXED_MUL(un_men_alpha,FlW[i]) + alpha - prune;
				if(FlW[i] < prune){
					FlW[i] = 0;
					Nmodes --;
				}
				sumWeights += FlW[i];
			}
		}

		//Renormalize weights so they sum to 1
		for (i = 0; i < Nmodes; i++)	{
			FlW[i] = FIXED_DIV(FlW[i],sumWeights);
		}

		//Create new mode if match not found
		if (!Fit){

			if (Nmodes == MaxModes){
				FlW[MaxModes-1] = alpha;
				FlM[MaxModes-1] = Frame;
				FlS[MaxModes-1] = variance;
			}else{
				FlW[Nmodes] = alpha;
				FlM[Nmodes] = Frame;
				FlS[Nmodes] = variance;
				Nmodes++;
			}

			if (Nmodes==1)
				FlW[Nmodes-1] = FIXED_ONE;

			S = 0;
			for(j = 0; j < Nmodes; j++){
				S += FlW[j];
			}

			for(j = 0; j < Nmodes; j++){
				FlW[j] = FIXED_DIV(FlW[j],S);
			}

			for (j=Nmodes; j > 0; j--){
				if (FlW[j] > FlW[j-1]){
					tempW = FlW[j];
					tempM = FlM[j];
					tempS = FlS[j];
					FlW[j] = FlW[j-1];
					FlM[j] = FlM[j-1];
					FlS[j] = FlS[j-1];
					FlW[j-1] = tempW;
					FlM[j-1] = tempM;
					FlS[j-1] = tempS;
				}
			}
		}

#if PRINT_IMG
		/* out mask - BINARY ! */
		mask[out_p + Block_C] = thr;
#else
		pix[Block_C] = thr;
#endif
		/* store to L3, copy if needed */
		StoreGMM(P, Block_C, Nmodes, FlW, FlM, FlS);

	}

}

void BGSubtract(unsigned char *Frame){
	long pos;
	int i, Th;

#if PERF
	uint32_t tt;
	pi_perf_conf(1 << PI_PERF_CYCLES | 1 << PI_PERF_ACTIVE_CYCLES);
	pi_perf_start();
	tt = pi_perf_read(PI_PERF_ACTIVE_CYCLES);
#endif

	//readwrite = (uint8_t *) pi_fc_l1_malloc((uint32_t) sizeof(GMM)*MaxModes*BLOCK_SIZE);
	readwrite = (uint8_t *)pi_l2_malloc((uint32_t) sizeof(GMM)*MaxModes*BLOCK_SIZE);
	if (readwrite == NULL)
	{
		printf("buff alloc failed !\n");
		pmsis_exit(-1);
	}
	printf("OK!\n");

	for(i = 0; i < Size; i += BLOCK_SIZE){
		pos=i*MaxModes;
		SubtractPixel(pos, &Frame[i], i);
	}

	//pi_fc_l1_free(readwrite, (uint32_t) sizeof(GMM)*MaxModes*BLOCK_SIZE);
	pi_l2_free(readwrite, (uint32_t) sizeof(GMM)*MaxModes*BLOCK_SIZE);

#if PERF
	pi_perf_stop();
	printf("BGSubtract Cycles %d \n",(int)(pi_perf_read(PI_PERF_ACTIVE_CYCLES)-tt));
#endif

#if PRINT_IMG
	for(i = 0; i < Size; i++){
		printf("%d ,",mask[i]);
	}
	printf("\n");
#endif

}




